'''Plot CoStar, Corelogic Examples Paper 2
Sean McCulloch 4/10/20'''
import os
import pandas as pd
import geopandas as gpd
import pyproj
from pyproj import Proj, transform
import shapely
from shapely.geometry import Point, Polygon
from shapely import wkt
from shapely.ops import transform
from descartes import PolygonPatch
from functools import partial
import matplotlib.pyplot as plt
import matplotlib.cm as cmx
import matplotlib.colors as colors
os.chdir('Y:\housing_regulation\gis\code')
proj_wgs84 = pyproj.Proj(init='epsg:4326')
def create_city_center(cbsa):
cbsa_coord_df = pd.read_stata('./city_center_coords.dta')
cbsa_coord_df['cbsacode'] = cbsa_coord_df['cbsacode'].apply(str)
cbsa_coord_df = cbsa_coord_df[cbsa_coord_df['cbsacode'] == cbsa]
cbsa_coord_df = cbsa_coord_df.reset_index(drop=True)
return cbsa_coord_df
def create_county_df():
counties_df = gpd.read_file('../shapefile/tl_2016_us_county.shp')
counties_df['STATE_COUNTY'] = counties_df.apply(lambda x: str(x['STATEFP']) + '_' + str(x['COUNTYFP']), axis = 1)
return counties_df
def create_county_list(cbsa):
counties = pd.read_csv('./countylist.txt', sep = '\t', header = None)
counties = counties.rename(columns = {0: 'countycode', 2: 'CBSAFP', 5: 'countyname'})
counties['CBSAFP'] = counties['CBSAFP'].apply(str)
counties['countycode'] = counties['countycode'].apply(str)
counties['STATE_COUNTY'] = counties.apply(lambda x: x['countycode'][0:2] + '_' + x['countycode'][-3:] if len(x['countycode']) == 5
else '0' + x['countycode'][0:1] + '_' + x['countycode'][-3:], axis = 1)
counties = counties[['STATE_COUNTY', 'CBSAFP', 'countyname']]
counties = counties[counties['CBSAFP'] == cbsa]
counties = list(counties.sort_values('countyname')['STATE_COUNTY'])
return counties
def create_county_name_list(cbsa):
counties = pd.read_csv('./countylist.txt', sep = '\t', header = None)
counties = counties.rename(columns = {0: 'countycode', 2: 'CBSAFP', 5: 'countyname'})
counties['CBSAFP'] = counties['CBSAFP'].apply(str)
counties['countycode'] = counties['countycode'].apply(str)
counties['STATE_COUNTY'] = counties.apply(lambda x: x['countycode'][0:2] + '_' + x['countycode'][-3:] if len(x['countycode']) == 5
else '0' + x['countycode'][0:1] + '_' + x['countycode'][-3:], axis = 1)
counties = counties[['STATE_COUNTY', 'CBSAFP', 'countyname']]
counties = counties[counties['CBSAFP'] == cbsa]
countynames = list(counties.sort_values('countyname')['countyname'])
counties = list(counties.sort_values('countyname')['STATE_COUNTY'])
return counties, countynames
def create_cbsa_counties_df(counties_df, counties):
counties_df['tokeep'] = counties_df.apply(lambda x: x['STATE_COUNTY'] in counties, axis = 1)
counties_df = counties_df[counties_df['tokeep'] == True]
return counties_df
def create_cbsas(cbsa):
cbsa_df = gpd.read_file('../shapefile/tl_2017_us_cbsa.shp')
cbsa_df = cbsa_df[cbsa_df['CBSAFP'] == cbsa]
return cbsa_df
#This method plots a ring of points x distance from lat, lon and then makes an ellipse.
def geodesic_point_buffer(lat, lon, km):
# Azimuthal equidistant projection
aeqd_proj = '+proj=aeqd +lat_0={lat} +lon_0={lon} +x_0=0 +y_0=0'
project = partial(
pyproj.transform,
pyproj.Proj(aeqd_proj.format(lat=lat, lon=lon)),
proj_wgs84)
buf = Point(0, 0).buffer(km * 1000) # distance in meters
return shapely.ops.transform(project, buf) #.exterior.coords[:]
def plot_by_cbsa_circle(points, cbsa, national_counties_df, pos = 'best'):
cbsa_df = create_cbsas(cbsa)
counties = create_county_list(cbsa)
city_center = create_city_center(cbsa)
base = create_cbsa_counties_df(national_counties_df, counties)
cbsabase = cbsa_df
if cbsa == "35620":
f, ax = plt.subplots(1, figsize=(62, 75))
else:
f, ax = plt.subplots(1, figsize=(28, 34))
ax.set_axis_off()
plt.axis('equal')
ax = cbsabase.plot(ax=ax, color='None', edgecolor='black', zorder = 20)
ax = base.plot(ax=ax, color='None', edgecolor='black', zorder = 30)
for county in counties:
county_center = base[base['STATE_COUNTY'] == county]
state_str, county_str = state_str, county_str = county.split('_')[0], county.split('_')[1]
try:
water_gdf = gpd.read_file('../shapefile/water/tl_2016_{}{}_areawater.shp'.format(state_str, county_str))
ax = water_gdf.plot(ax = ax, color = 'blue', alpha=1)
except:
pass
countyname = county_center['NAMELSAD'].iloc[-1]
county_centroid = county_center['geometry'].centroid
county_labels = '{}'.format(countyname)
gpd.GeoSeries(county_centroid).plot(ax = ax, marker= 'o', color = 'black', alpha=0, label = county_labels, markersize=50)
plt.text(county_centroid.x.tolist()[0], county_centroid.y.tolist()[0],
county_labels, size=15, rotation=-0.1, ha="right", va="top", zorder=50)
city_center_df = create_city_center(cbsa)
y_coord = city_center_df.iloc[0]['cbsa_longitude']
x_coord = city_center_df.iloc[0]['cbsa_latitude']
y_coord = float(y_coord)
x_coord = float(x_coord)
#Try plotting circle with point buffer rather than artist. Ellipsoids
km = 48.2803 # 48.2803 km ~ 30 miles
circle_points = geodesic_point_buffer(x_coord, y_coord, km)
circle_points_patch2 = PolygonPatch(circle_points, facecolor='None',
edgecolor= 'green', linewidth=3)
ax.add_patch(circle_points_patch2)
#Now add whatever points are being plotted.
x_values = points['latitude'].tolist()
y_values = points['longitude'].tolist()
#Scale the size of dots by number of observations
if points.shape[0] == 0:
print('No Points to Plot')
scalar = 22
elif points.shape[0] == 1:
scalar = 22
else:
scalar = 22 / (points.shape[0] / 100)
plt.scatter(y_values, x_values, color='red', s=scalar, zorder=20)
plt.show()
def plot_3maps(point_df, cbsa):
#Load in data
county = str(int(point_df['county'].iloc[0]))
q_points = pd.read_stata(r'Z:\data\paper2_casestudy\corelogic_counties_q\qcoords_'
+ county + '.dta')
q_points = q_points.rename(columns={'blocklevellongitude':'longitude',
'blocklevellatitude':'latitude'})
A_points = pd.read_stata(r'Z:\data\paper2_casestudy\corelogic_counties_A\Acoords_'
+ county + '.dta')
A_points = A_points.rename(columns={'blocklevellongitude':'longitude',
'blocklevellatitude':'latitude'})
q_nobs = str(int(q_points.shape[0]))
A_nobs = str(int(A_points.shape[0]))
print('CoStar Info:')
print(point_df.iloc[0])
print("-------------------")
print("Location of CoStar Sale:")
plot_by_cbsa_circle(point_df, str(cbsa), national_counties_df, 'best')
print("Locations of Hedonic q Corelogic Observations (Nobs:" + q_nobs + "):")
plot_by_cbsa_circle(q_points, str(cbsa), national_counties_df, 'best')
print("Locations of New Housing A Corelogic Observations (Nobs:" + A_nobs + "):")
plot_by_cbsa_circle(A_points, str(cbsa), national_counties_df, 'best')
national_counties_df = create_county_df()
#Plot Costar Point
costar_df = pd.read_stata(r'Z:\data\paper2_casestudy\costar_descN.dta')
cbsas = [41860, 12060, 42660, 31080, 35620, 19100]
for c in cbsas:
points_df = costar_df[costar_df['cbsa'] == c]
cbsaname = points_df['cbsaname'].iloc[0]
medz = points_df['med_z_qac'].iloc[0]
#Only take 6 obs closest to median
points_df = points_df.iloc[(points_df['z_perquartac']-medz).abs().argsort()[:6]]
#Limit to central observations
#length = points_df.shape[0]
#if length >= 8:
#points_df = points_df.iloc[round(length/2)-3 : round(length/2)+3]
print("CBSA: "+ cbsaname + ", 6 Central Obs with N Comment")
for i in range(points_df.shape[0]):
print("Example " + str(i + 1))
point_df = points_df.iloc[[i]]
plot_3maps(point_df, c)
print("-------------------------------------------------------------------------------")